package com.markchu.admin.web;

import org.jsoup.Connection;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

public class LoginExample {
    public static void main(String[] args) throws Exception {
        String url = "https://www.dianping.com/shanghai/ch95/p13"; // 要爬取的网页 URL
        // 创建连接对象
        Connection connection = Jsoup.connect(url);
        
        // 设置POST请求参数（根据需要修改）
//        String username = "your_username";
//        String password = "your_password";
//        connection.data("username", username);
//        connection.data("password", password);
        
        // 添加自定义header信息
        connection.header("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.82 Safari/537.36");
        connection.header("Cookie", "_lxsdk_cuid=18cbf310eaac8-078faebc14fa77-1f525637-13c680-18cbf310eaac8; _lxsdk=18cbf310eaac8-078faebc14fa77-1f525637-13c680-18cbf310eaac8; _hc.v=8e619f5e-d474-a61b-d608-0ce877981b51.1704014713; s_ViewType=10; WEBDFPID=v3ux962902z25uy003z808zu5v8uy7u081xyu685256979583u295x7v-2019374727748-1704014727748WEGIQAC75613c134b6a252faa6802015be905511095; cy=1; cye=shanghai; fspop=test; _lx_utm=utm_source%3DBaidu%26utm_medium%3Dorganic; Hm_lvt_602b80cf8079ae6591966cc70a3940e7=1704014713,1704949972; ctu=f04bac7c942de6fc2bebbe94943784b7740902aab5f29ce9a9df328d2a66488c; qruuid=fa1e9002-791d-420c-93f7-f43e879cfae8; dplet=d917f22611369bb919a7108635ca8cf3; dper=a5306a1fcee38fd66d858cf1ab500b2b742e46a2bfb941ffda6912aff2385d5e684b3899f02ba2bf49b535886db438a04549e2f0e220a89188ebe4de3e0be6e4; ll=7fd06e815b796be3df069dec7836c3df; ua=cwftalus; Hm_lpvt_602b80cf8079ae6591966cc70a3940e7=1705037370; _lxsdk_s=18cfc2531db-e12-8e8-a9c%7C%7C61");
        
        // 发起POST请求并获取返回结果
        Document document = connection.post();
        Elements elements = document.select("#shop-all-list li"); //
        System.out.println(elements.size()); // 输出<h1>元素的文本内容
        int i = 0;
        for (Element element : elements) {
            i ++;
            System.out.println("开始处理"+i);
            String picture = element.select(".pic img").get(0).attr("src");
            System.out.println("picture"+picture); // 输出<h1>元素的文本内容
//            String title = element.select(".tit h4").get(0).text(); // 输出<h1>元素的文本内容
//            System.out.println(title); // 输出<h1>元素的文本内容
            String info = element.select(".tag-addr").get(0).text(); // 输出<h1>元素的文本内容
            System.out.println(info); // 输出<h1>元素的文本内容
//
            String curl = element.select(".tit a").get(0).attr("href"); // 输出<h1>元素的文本内容
            System.out.println(curl); // 输出<h1>元素的文本内容

            Connection connection_s = Jsoup.connect(curl);
            // 添加自定义header信息
            connection_s.header("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.82 Safari/537.36");
            connection_s.header("Cookie", "_lxsdk_cuid=18cbf310eaac8-078faebc14fa77-1f525637-13c680-18cbf310eaac8; _lxsdk=18cbf310eaac8-078faebc14fa77-1f525637-13c680-18cbf310eaac8; _hc.v=8e619f5e-d474-a61b-d608-0ce877981b51.1704014713; s_ViewType=10; WEBDFPID=v3ux962902z25uy003z808zu5v8uy7u081xyu685256979583u295x7v-2019374727748-1704014727748WEGIQAC75613c134b6a252faa6802015be905511095; cy=1; cye=shanghai; fspop=test; _lx_utm=utm_source%3DBaidu%26utm_medium%3Dorganic; Hm_lvt_602b80cf8079ae6591966cc70a3940e7=1704014713,1704949972; ctu=f04bac7c942de6fc2bebbe94943784b7740902aab5f29ce9a9df328d2a66488c; qruuid=fa1e9002-791d-420c-93f7-f43e879cfae8; dplet=d917f22611369bb919a7108635ca8cf3; dper=a5306a1fcee38fd66d858cf1ab500b2b742e46a2bfb941ffda6912aff2385d5e684b3899f02ba2bf49b535886db438a04549e2f0e220a89188ebe4de3e0be6e4; ll=7fd06e815b796be3df069dec7836c3df; ua=cwftalus; Hm_lpvt_602b80cf8079ae6591966cc70a3940e7=1704962034; _lxsdk_s=18cf78e6ad9-60a-ebf-");
            // 发起POST请求并获取返回结果
            Document document_s = connection_s.post();
            String area = "";
            if(document_s.select(".breadcrumb").size()>0){
                 area = document_s.select(".breadcrumb").get(0).text();
            }
            String shop_name = document_s.select(".shop-name").get(0).text();
            String address = document_s.select("#address").text();
            String tel = document_s.select(".tel").get(0).text();
            String item = document_s.select(".J-other span.item").get(0).text();
            System.out.println(shop_name);
            System.out.println(address);
            System.out.println(tel);
            System.out.println(item);
            DataWriter.insertInto(shop_name,area,address,info,tel,picture,item);


        }
        String content = document.html();
//        System.out.println(content);
        // 处理返回结果或者其他操作...
    }
}